1 package org.apache.lucene.index;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 import java.io.IOException;
21 import java.util.ArrayList;
22 import java.util.Collection;
23 import java.util.List;
24 import java.util.Locale;
25 import java.util.Map;
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47 public abstract class LogMergePolicy extends MergePolicy {
48
49
50
51
52
53 public static final double LEVEL_LOG_SPAN = 0.75;
54
55
56
57 public static final int DEFAULT_MERGE_FACTOR = 10;
58
59
60
61 public static final int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE;
62
63
64
65
66 public static final double DEFAULT_NO_CFS_RATIO = 0.1;
67
68
69 protected int mergeFactor = DEFAULT_MERGE_FACTOR;
70
71
72
73
74 protected long minMergeSize;
75
76
77
78 protected long maxMergeSize;
79
80
81
82
83
84 protected long maxMergeSizeForForcedMerge = Long.MAX_VALUE;
85
86
87
88 protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
89
90
91
92 protected boolean calibrateSizeByDeletes = true;
93
94
95
96 public LogMergePolicy() {
97 super(DEFAULT_NO_CFS_RATIO, MergePolicy.DEFAULT_MAX_CFS_SEGMENT_SIZE);
98 }
99
100
101
102 protected boolean verbose(IndexWriter writer) {
103 return writer != null && writer.infoStream.isEnabled("LMP");
104 }
105
106
107
108 protected void message(String message, IndexWriter writer) {
109 if (verbose(writer)) {
110 writer.infoStream.message("LMP", message);
111 }
112 }
113
114
115
116
117 public int getMergeFactor() {
118 return mergeFactor;
119 }
120
121
122
123
124
125
126
127
128
129
130 public void setMergeFactor(int mergeFactor) {
131 if (mergeFactor < 2)
132 throw new IllegalArgumentException("mergeFactor cannot be less than 2");
133 this.mergeFactor = mergeFactor;
134 }
135
136
137
138 public void setCalibrateSizeByDeletes(boolean calibrateSizeByDeletes) {
139 this.calibrateSizeByDeletes = calibrateSizeByDeletes;
140 }
141
142
143
144 public boolean getCalibrateSizeByDeletes() {
145 return calibrateSizeByDeletes;
146 }
147
148
149
150
151
152 protected long sizeDocs(SegmentCommitInfo info, IndexWriter writer) throws IOException {
153 if (calibrateSizeByDeletes) {
154 int delCount = writer.numDeletedDocs(info);
155 assert delCount <= info.info.maxDoc();
156 return (info.info.maxDoc() - (long)delCount);
157 } else {
158 return info.info.maxDoc();
159 }
160 }
161
162
163
164
165
166 protected long sizeBytes(SegmentCommitInfo info, IndexWriter writer) throws IOException {
167 if (calibrateSizeByDeletes) {
168 return super.size(info, writer);
169 }
170 return info.sizeInBytes();
171 }
172
173
174
175
176 protected boolean isMerged(SegmentInfos infos, int maxNumSegments, Map<SegmentCommitInfo,Boolean> segmentsToMerge, IndexWriter writer) throws IOException {
177 final int numSegments = infos.size();
178 int numToMerge = 0;
179 SegmentCommitInfo mergeInfo = null;
180 boolean segmentIsOriginal = false;
181 for(int i=0;i<numSegments && numToMerge <= maxNumSegments;i++) {
182 final SegmentCommitInfo info = infos.info(i);
183 final Boolean isOriginal = segmentsToMerge.get(info);
184 if (isOriginal != null) {
185 segmentIsOriginal = isOriginal;
186 numToMerge++;
187 mergeInfo = info;
188 }
189 }
190
191 return numToMerge <= maxNumSegments &&
192 (numToMerge != 1 || !segmentIsOriginal || isMerged(infos, mergeInfo, writer));
193 }
194
195
196
197
198
199
200
201
202
203 private MergeSpecification findForcedMergesSizeLimit(
204 SegmentInfos infos, int maxNumSegments, int last, IndexWriter writer) throws IOException {
205 MergeSpecification spec = new MergeSpecification();
206 final List<SegmentCommitInfo> segments = infos.asList();
207
208 int start = last - 1;
209 while (start >= 0) {
210 SegmentCommitInfo info = infos.info(start);
211 if (size(info, writer) > maxMergeSizeForForcedMerge || sizeDocs(info, writer) > maxMergeDocs) {
212 if (verbose(writer)) {
213 message("findForcedMergesSizeLimit: skip segment=" + info + ": size is > maxMergeSize (" + maxMergeSizeForForcedMerge + ") or sizeDocs is > maxMergeDocs (" + maxMergeDocs + ")", writer);
214 }
215
216
217 if (last - start - 1 > 1 || (start != last - 1 && !isMerged(infos, infos.info(start + 1), writer))) {
218
219
220 spec.add(new OneMerge(segments.subList(start + 1, last)));
221 }
222 last = start;
223 } else if (last - start == mergeFactor) {
224
225 spec.add(new OneMerge(segments.subList(start, last)));
226 last = start;
227 }
228 --start;
229 }
230
231
232
233 if (last > 0 && (++start + 1 < last || !isMerged(infos, infos.info(start), writer))) {
234 spec.add(new OneMerge(segments.subList(start, last)));
235 }
236
237 return spec.merges.size() == 0 ? null : spec;
238 }
239
240
241
242
243
244
245 private MergeSpecification findForcedMergesMaxNumSegments(SegmentInfos infos, int maxNumSegments, int last, IndexWriter writer) throws IOException {
246 MergeSpecification spec = new MergeSpecification();
247 final List<SegmentCommitInfo> segments = infos.asList();
248
249
250
251 while (last - maxNumSegments + 1 >= mergeFactor) {
252 spec.add(new OneMerge(segments.subList(last - mergeFactor, last)));
253 last -= mergeFactor;
254 }
255
256
257
258 if (0 == spec.merges.size()) {
259 if (maxNumSegments == 1) {
260
261
262
263 if (last > 1 || !isMerged(infos, infos.info(0), writer)) {
264 spec.add(new OneMerge(segments.subList(0, last)));
265 }
266 } else if (last > maxNumSegments) {
267
268
269
270
271
272
273
274
275
276
277 final int finalMergeSize = last - maxNumSegments + 1;
278
279
280 long bestSize = 0;
281 int bestStart = 0;
282
283 for(int i=0;i<last-finalMergeSize+1;i++) {
284 long sumSize = 0;
285 for(int j=0;j<finalMergeSize;j++) {
286 sumSize += size(infos.info(j+i), writer);
287 }
288 if (i == 0 || (sumSize < 2*size(infos.info(i-1), writer) && sumSize < bestSize)) {
289 bestStart = i;
290 bestSize = sumSize;
291 }
292 }
293
294 spec.add(new OneMerge(segments.subList(bestStart, bestStart + finalMergeSize)));
295 }
296 }
297 return spec.merges.size() == 0 ? null : spec;
298 }
299
300
301
302
303
304
305
306
307
308
309
310 @Override
311 public MergeSpecification findForcedMerges(SegmentInfos infos,
312 int maxNumSegments, Map<SegmentCommitInfo,Boolean> segmentsToMerge, IndexWriter writer) throws IOException {
313
314 assert maxNumSegments > 0;
315 if (verbose(writer)) {
316 message("findForcedMerges: maxNumSegs=" + maxNumSegments + " segsToMerge="+ segmentsToMerge, writer);
317 }
318
319
320
321 if (isMerged(infos, maxNumSegments, segmentsToMerge, writer)) {
322 if (verbose(writer)) {
323 message("already merged; skip", writer);
324 }
325 return null;
326 }
327
328
329
330
331 int last = infos.size();
332 while (last > 0) {
333 final SegmentCommitInfo info = infos.info(--last);
334 if (segmentsToMerge.get(info) != null) {
335 last++;
336 break;
337 }
338 }
339
340 if (last == 0) {
341 if (verbose(writer)) {
342 message("last == 0; skip", writer);
343 }
344 return null;
345 }
346
347
348 if (maxNumSegments == 1 && last == 1 && isMerged(infos, infos.info(0), writer)) {
349 if (verbose(writer)) {
350 message("already 1 seg; skip", writer);
351 }
352 return null;
353 }
354
355
356 boolean anyTooLarge = false;
357 for (int i = 0; i < last; i++) {
358 SegmentCommitInfo info = infos.info(i);
359 if (size(info, writer) > maxMergeSizeForForcedMerge || sizeDocs(info, writer) > maxMergeDocs) {
360 anyTooLarge = true;
361 break;
362 }
363 }
364
365 if (anyTooLarge) {
366 return findForcedMergesSizeLimit(infos, maxNumSegments, last, writer);
367 } else {
368 return findForcedMergesMaxNumSegments(infos, maxNumSegments, last, writer);
369 }
370 }
371
372
373
374
375
376
377 @Override
378 public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos, IndexWriter writer)
379 throws IOException {
380 final List<SegmentCommitInfo> segments = segmentInfos.asList();
381 final int numSegments = segments.size();
382
383 if (verbose(writer)) {
384 message("findForcedDeleteMerges: " + numSegments + " segments", writer);
385 }
386
387 MergeSpecification spec = new MergeSpecification();
388 int firstSegmentWithDeletions = -1;
389 assert writer != null;
390 for(int i=0;i<numSegments;i++) {
391 final SegmentCommitInfo info = segmentInfos.info(i);
392 int delCount = writer.numDeletedDocs(info);
393 if (delCount > 0) {
394 if (verbose(writer)) {
395 message(" segment " + info.info.name + " has deletions", writer);
396 }
397 if (firstSegmentWithDeletions == -1)
398 firstSegmentWithDeletions = i;
399 else if (i - firstSegmentWithDeletions == mergeFactor) {
400
401
402 if (verbose(writer)) {
403 message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive", writer);
404 }
405 spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, i)));
406 firstSegmentWithDeletions = i;
407 }
408 } else if (firstSegmentWithDeletions != -1) {
409
410
411
412 if (verbose(writer)) {
413 message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive", writer);
414 }
415 spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, i)));
416 firstSegmentWithDeletions = -1;
417 }
418 }
419
420 if (firstSegmentWithDeletions != -1) {
421 if (verbose(writer)) {
422 message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments-1) + " inclusive", writer);
423 }
424 spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, numSegments)));
425 }
426
427 return spec;
428 }
429
430 private static class SegmentInfoAndLevel implements Comparable<SegmentInfoAndLevel> {
431 SegmentCommitInfo info;
432 float level;
433 int index;
434
435 public SegmentInfoAndLevel(SegmentCommitInfo info, float level, int index) {
436 this.info = info;
437 this.level = level;
438 this.index = index;
439 }
440
441
442 @Override
443 public int compareTo(SegmentInfoAndLevel other) {
444 return Float.compare(other.level, level);
445 }
446 }
447
448
449
450
451
452
453
454
455 @Override
456 public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos infos, IndexWriter writer) throws IOException {
457
458 final int numSegments = infos.size();
459 if (verbose(writer)) {
460 message("findMerges: " + numSegments + " segments", writer);
461 }
462
463
464
465 final List<SegmentInfoAndLevel> levels = new ArrayList<>(numSegments);
466 final float norm = (float) Math.log(mergeFactor);
467
468 final Collection<SegmentCommitInfo> mergingSegments = writer.getMergingSegments();
469
470 for(int i=0;i<numSegments;i++) {
471 final SegmentCommitInfo info = infos.info(i);
472 long size = size(info, writer);
473
474
475 if (size < 1) {
476 size = 1;
477 }
478
479 final SegmentInfoAndLevel infoLevel = new SegmentInfoAndLevel(info, (float) Math.log(size)/norm, i);
480 levels.add(infoLevel);
481
482 if (verbose(writer)) {
483 final long segBytes = sizeBytes(info, writer);
484 String extra = mergingSegments.contains(info) ? " [merging]" : "";
485 if (size >= maxMergeSize) {
486 extra += " [skip: too large]";
487 }
488 message("seg=" + writer.segString(info) + " level=" + infoLevel.level + " size=" + String.format(Locale.ROOT, "%.3f MB", segBytes/1024/1024.) + extra, writer);
489 }
490 }
491
492 final float levelFloor;
493 if (minMergeSize <= 0)
494 levelFloor = (float) 0.0;
495 else
496 levelFloor = (float) (Math.log(minMergeSize)/norm);
497
498
499
500
501
502
503
504
505 MergeSpecification spec = null;
506
507 final int numMergeableSegments = levels.size();
508
509 int start = 0;
510 while(start < numMergeableSegments) {
511
512
513
514 float maxLevel = levels.get(start).level;
515 for(int i=1+start;i<numMergeableSegments;i++) {
516 final float level = levels.get(i).level;
517 if (level > maxLevel) {
518 maxLevel = level;
519 }
520 }
521
522
523
524 float levelBottom;
525 if (maxLevel <= levelFloor) {
526
527 levelBottom = -1.0F;
528 } else {
529 levelBottom = (float) (maxLevel - LEVEL_LOG_SPAN);
530
531
532 if (levelBottom < levelFloor && maxLevel >= levelFloor) {
533 levelBottom = levelFloor;
534 }
535 }
536
537 int upto = numMergeableSegments-1;
538 while(upto >= start) {
539 if (levels.get(upto).level >= levelBottom) {
540 break;
541 }
542 upto--;
543 }
544 if (verbose(writer)) {
545 message(" level " + levelBottom + " to " + maxLevel + ": " + (1+upto-start) + " segments", writer);
546 }
547
548
549 int end = start + mergeFactor;
550 while(end <= 1+upto) {
551 boolean anyTooLarge = false;
552 boolean anyMerging = false;
553 for(int i=start;i<end;i++) {
554 final SegmentCommitInfo info = levels.get(i).info;
555 anyTooLarge |= (size(info, writer) >= maxMergeSize || sizeDocs(info, writer) >= maxMergeDocs);
556 if (mergingSegments.contains(info)) {
557 anyMerging = true;
558 break;
559 }
560 }
561
562 if (anyMerging) {
563
564 } else if (!anyTooLarge) {
565 if (spec == null)
566 spec = new MergeSpecification();
567 final List<SegmentCommitInfo> mergeInfos = new ArrayList<>(end-start);
568 for(int i=start;i<end;i++) {
569 mergeInfos.add(levels.get(i).info);
570 assert infos.contains(levels.get(i).info);
571 }
572 if (verbose(writer)) {
573 message(" add merge=" + writer.segString(mergeInfos) + " start=" + start + " end=" + end, writer);
574 }
575 spec.add(new OneMerge(mergeInfos));
576 } else if (verbose(writer)) {
577 message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping", writer);
578 }
579
580 start = end;
581 end = start + mergeFactor;
582 }
583
584 start = 1+upto;
585 }
586
587 return spec;
588 }
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605 public void setMaxMergeDocs(int maxMergeDocs) {
606 this.maxMergeDocs = maxMergeDocs;
607 }
608
609
610
611
612 public int getMaxMergeDocs() {
613 return maxMergeDocs;
614 }
615
616 @Override
617 public String toString() {
618 StringBuilder sb = new StringBuilder("[" + getClass().getSimpleName() + ": ");
619 sb.append("minMergeSize=").append(minMergeSize).append(", ");
620 sb.append("mergeFactor=").append(mergeFactor).append(", ");
621 sb.append("maxMergeSize=").append(maxMergeSize).append(", ");
622 sb.append("maxMergeSizeForForcedMerge=").append(maxMergeSizeForForcedMerge).append(", ");
623 sb.append("calibrateSizeByDeletes=").append(calibrateSizeByDeletes).append(", ");
624 sb.append("maxMergeDocs=").append(maxMergeDocs).append(", ");
625 sb.append("maxCFSSegmentSizeMB=").append(getMaxCFSSegmentSizeMB()).append(", ");
626 sb.append("noCFSRatio=").append(noCFSRatio);
627 sb.append("]");
628 return sb.toString();
629 }
630
631 }